##################### Appended VMW TRT (23, 19, 3) DiD regressions ####################

output_tables_dir <- file.path(getwd(), "output/")

#------------------------------- Load the Data ---------------------#

df_trt_23 <- read.csv("data/tmp data/10_percent_sample_DIVISION_trt_23_distance_Q_clt.csv")
df_trt_19 <- read.csv("data/tmp data/10_percent_sample_DIVISION_trt_19_distance_Q_clt.csv")
df_trt_3 <- read.csv("data/tmp data/10_percent_sample_DIVISION_trt_3_distance_Q_clt.csv")
# Existing column only found in trt_23 raw data 
df_trt_23 <- df_trt_23 %>%
  select(-existing)

df_trt_19 <- df_trt_19 %>%
  rename(nearest_metro_market = nearest_metro_area)

# all_columns <- unique(c(names(df_trt_23), names(df_trt_19), names(df_trt_3)))
# 
# comparison <- tibble::tibble(
#   column_name = all_columns, 
#   in_trt_23 = column_name %in% names(df_trt_23), 
#   in_trt_19 = column_name %in% names(df_trt_19),
#   in_trt_3 = column_name %in% names(df_trt_3),
# )
# print(comparison)


df_distance <- rbind(df_trt_23, df_trt_19, df_trt_3)

df_distance <- df_distance%>% 
  mutate(cz = czone)

stacked_policy_firm_wages_data <- read.csv("data/credit_bureau/stacked_policy_firm_wages_dataset.csv")

#### Merge the Data 
df <- left_join(df_distance, stacked_policy_firm_wages_data, by = c("cz", "etime", "trt_exp"))

#-----------------------------------------------------------#
# Prep Regression Analysis 

#Create post_period dummy
df = df %>% 
  mutate(post_period24 = ifelse(etime > -1, 1, 0))

df = df %>% 
  mutate(post_period12 = case_when(
    etime >= 0 & etime <= 6 ~ 1, 
    etime <= -1 & etime >= -6 ~ 0, 
    TRUE ~ NA_real_
  ))

df$distance_morethan_q1 <- ifelse(df$distance_quantile > 1 , 1, 0)

df$distance_morethan_q2 <- ifelse(df$distance_quantile > 2, 1, 0)

df_q_subset = df %>%
  filter(distance_quantile < 5)

df <- df %>% 
  mutate(super_gap_std = as.vector(scale(super_gap)),
         T_std = as.vector(scale(T)))

df_q_subset <- df_q_subset %>% 
  mutate(super_gap_std = as.vector(scale(super_gap)),
         T_std = as.vector(scale(T)))

#-----------------------------------------------------------#
# Variable Definitions

policy_y = c("ln_wage_policy")

y = c("ln_avg_wage_exact")

#Define set of treatment variables of interest
treat_var_vector = c("T_std") # , "super_gap_std"

#period_var = c("post_period24", "post_period12")
period_var = c("post_period12")

#-----------------------------------------------------------#
## Regression Analysis

for(p in period_var){
  
  for(t in treat_var_vector){
    
    #Set the according variable for the unstandardized treatment variable
    unst_t = substr(t, start = 1, stop = nchar(t) - 4)
    
    #Set a dictionary for variables' names
    dict = c(ln_avg_wage_exact = "Log Average Wage",
             T_std = "Large Retailer Gap (std.)",
             #post_period = "Post",
             clt_client = "Firm",
             czone = "CZ",
             etime = "Event time",
             trt_exp = "")
    
    fixest::setFixest_dict(dict)
    
    #Define formula for regression (this is necessary to loop over different dependent variables)
    formula1a = as.formula(paste(policy_y, "~ ", t, "*", p,  "| 
                              clt_client^trt_exp + czone^trt_exp + etime^trt_exp"))
    formula1b = as.formula(paste(y, "~ ", t, "*", p, "* distance_morethan_q1 +", t, "*", p, "* distance_morethan_q2", "| 
                              clt_client^trt_exp + czone^trt_exp + etime^trt_exp"))
    
    fit1a = fixest::feols(formula1a, cluster = ~czone, data = df)
    fit1b = fixest::feols(formula1b, cluster = ~czone, data = df)
    
    sd_unst_ta = df[fixest::obs(fit1a), ] %>% 
      summarise(sd = sd(!!sym(unst_t))) %>%
      pull()
    
    sd_unst_tb = df[fixest::obs(fit1b), ] %>% 
      summarise(sd = sd(!!sym(unst_t))) %>%
      pull()
    
    cz_count <- n_distinct(df$czone)
    clt_client_count <- n_distinct(df$clt_client)
    
    table_style <- fixest::style.tex(main = "aer",
                                     model.format = "",
                                     #model.title = "\\textit{OLS Dep. var: Cross Wage Elasticity}",
                                     yesNo = "Y",
                                     depvar.title = "Dep. var:",
                                     depvar.style = "*", #italic dependent variable title
                                     fixef.title = "\\midrule",
                                     fixef.suffix = "FEs",
                                     fixef.where = "var",
                                     line.top = "double"
    )
    
    #Output customized TEX table
    fixest::etable(fit1a, fit1b,
                   tex = T,
                   style.tex = table_style,
                   depvar = F, #remove dependent variable title from the top part of the table
                   headers = c("Log Average Policy Wage", "Log Average Wage Exact"),
                   fitstat = ~ n + r2,
                   #group = c("OLS", "IV"),
                   file = file.path(
                     output_tables_dir,
                     paste0("VMW_DIVISION_distance_OLS_10_percent", y, "_treat_var_", t, p, ".tex")
                   ),
                   replace = TRUE,
                   #row.labels = c("OLS", "IV"),
                   extralines = list( "CZ's" = c(cz_count, cz_count),
                                      "Number of Firms" = c(clt_client_count, clt_client_count),
                                      "SD treat. var. (unstd.)" = c(sd_unst_ta, sd_unst_tb)
                   )
                   #model.order
    )
    
    
  }
}

### SUBSET ###

for(p in period_var){
  
  for(t in treat_var_vector){
    
    #Set the according variable for the unstandardized treatment variable
    unst_t = substr(t, start = 1, stop = nchar(t) - 4)
    
    #Set a dictionary for variables' names
    dict = c(ln_avg_wage_exact = "Log Average Wage",
             T_std = "Large Retailer Gap (std.)",
             #post_period = "Post",
             clt_client = "Firm",
             czone = "CZ",
             etime = "Event time",
             trt_exp = "")
    
    fixest::setFixest_dict(dict)
    
    #Define formula for regression (this is necessary to loop over different dependent variables)
    formula1a = as.formula(paste(policy_y, "~ ", t, "*", p, "| 
                              clt_client^trt_exp + czone^trt_exp + etime^trt_exp"))
    formula1b = as.formula(paste(y, "~ ", t, "*", p, "* distance_morethan_q1 +", t, "*", p, "* distance_morethan_q2", "| 
                              clt_client^trt_exp + czone^trt_exp + etime^trt_exp"))
    
    fit1a = fixest::feols(formula1a, cluster = ~czone, data = df_q_subset)
    fit1b = fixest::feols(formula1b, cluster = ~czone, data = df_q_subset)
    
    sd_unst_ta = df_q_subset[fixest::obs(fit1a), ] %>% 
      summarise(sd = sd(!!sym(unst_t))) %>%
      pull()
    
    sd_unst_tb = df_q_subset[fixest::obs(fit1b), ] %>% 
      summarise(sd = sd(!!sym(unst_t))) %>%
      pull()
    
    cz_count <- n_distinct(df_q_subset$czone)
    clt_client_count <- n_distinct(df_q_subset$clt_client)
    
    table_style <- fixest::style.tex(main = "aer",
                                     model.format = "",
                                     #model.title = "\\textit{OLS Dep. var: Cross Wage Elasticity}",
                                     yesNo = "Y",
                                     depvar.title = "Dep. var:",
                                     depvar.style = "*", #italic dependent variable title
                                     fixef.title = "\\midrule",
                                     fixef.suffix = "FEs",
                                     fixef.where = "var",
                                     line.top = "double"
    )
    
    #Output customized TEX table
    fixest::etable(fit1a, fit1b,
                   tex = T,
                   style.tex = table_style,
                   depvar = F, #remove dependent variable title from the top part of the table
                   headers = c("Log Average Policy Wage", "Log Average Wage Exact"),
                   fitstat = ~ n + r2,
                   #group = c("OLS", "IV"),
                   file = file.path(
                     output_tables_dir,
                     paste0("DIVISION_distance_OLS_10_percent_Qsubset", y, "_treat_var_", t, p, ".tex")
                   ),
                   replace = TRUE,
                   #row.labels = c("OLS", "IV"),
                   extralines = list( "CZ's" = c(cz_count, cz_count),
                                      "Number of Firms" = c(clt_client_count, clt_client_count),
                                      "SD treat. var. (unstd.)" = c(sd_unst_ta, sd_unst_tb)
                   )
                   #model.order
    )
    
    
  }
}

